#############################################################################################
## Replication code for:                                                                   ##
## A Male Hostility Spiral? Polarized Communication among Political Elites on Social Media ##
## Step 3.                                                                                 ##
#############################################################################################

library(tidyverse)
library(readxl)
library(stringr)
library(stringi)

user_data <- read_excel("user_data.xlsx")
data1 <- read.csv("data1.csv")
data2 <- read.csv("data2.csv")

data1$date <- as.Date(data1$date)
data1$monthyear <- factor(floor_date(data1$date,"month"))

data2$date <- as.Date(data2$date)
data2$monthyear <- factor(floor_date(data2$date,"month"))

countries <- read.csv("countries_data.csv")
party_data <- read_excel("party_data.xlsx")

manifesto_data <- read_csv("MPDataset_MPDS2024a.csv") #needs to be downloaded
parlgov_data <-  read_csv("view_cabinet.csv") #needs to be downloaded
whogov_data <- read_excel("whogov.xlsx") #needs to be downloaded

whogov_data <- whogov_data %>%
  mutate(name2 = name %>%
           stri_trans_general("Latin-ASCII") %>%
           tolower() %>%
           str_remove_all("\"[^\"]+\"") %>%
           str_remove_all("[0-9]") %>%
           str_remove_all("\\.") %>%
           str_replace_all("\\s+[a-z]\\s+", " ") %>%
           str_squish() %>%
           str_replace_all("^(\\S+)\\s+.*\\s+(\\S+)$", "\\1 \\2")
  )
whogov_data <- whogov_data[!is.na(whogov_data$name2),]

user_data$cabinet_member <- NA
for (i in 1:nrow(user_data)){
  cabinet_member <- 0
  for (year in 2016:2018){
    temp_whogov <- whogov_data[whogov_data$year==year&whogov_data$country_name==user_data$country[i]&whogov_data$name2==user_data$name2[i],]
    if (nrow(temp_whogov)>0){
      cabinet_member <- 1
    }
  }
  user_data$cabinet_member[i] <- cabinet_member
}

user_data[is.na(user_data$is_head_of_gov),]$is_head_of_gov <- 0
user_data[user_data$is_head_of_gov==1,]$cabinet_member <- 1

user_data <- merge(user_data,party_data[c("party_id","mp_party_id","parlgov_id")],by="party_id",all.x=T)

parlgov_data <- parlgov_data %>% select(election_date,start_date,cabinet_party,party_id,country_name) %>%
  mutate(start_date = as.Date(start_date)) %>%
  filter(start_date > as.Date("2005-01-01") & country_name %in% levels(factor(user_data$country)))%>%
  arrange(party_id, start_date) %>%
  group_by(party_id) %>%
  mutate(end_date = lead(start_date, order_by = start_date, default = as.Date("2100-01-01"))) %>%
  ungroup()

parlgov_data$start_date <- as.Date(parlgov_data$start_date)
parlgov_data$end_date <- as.Date(parlgov_data$end_date)

get_gov <- function(data,parlgov_data){
  data$receiver_ingov <- NA
  data$sender_ingov <- NA
  pb <- progress_bar$new(
    format = "  Processing [:bar] :percent in :elapsed (:eta remaining)",
    total = nrow(data), clear = FALSE, width = 60
  )
  for (i in 1:nrow(data)){
    
    pb$tick()
    date <- data$date[i]
    country <- data$country[i]
    
    temp <- parlgov_data %>% filter(country_name==country&start_date<=date&end_date>=date&cabinet_party==1) %>% select(party_id)
    temp <- temp$party_id
    if (data$parlgov_id_receiver[i] %in% temp){
      data$receiver_ingov[i] <- 1
    }
    else{
      data$receiver_ingov[i] <- 0
    }
    if (is.na(data$parlgov_id_receiver[i])){
      data$receiver_ingov[i] <- NA
    }
    if (data$parlgov_id_sender[i] %in% temp){
      data$sender_ingov[i] <- 1
    }
    else{
      data$sender_ingov[i] <- 0
    }
    if (is.na(data$parlgov_id_sender[i])){
      data$sender_ingov[i] <- NA
    }
  }
  return(data)
}

manifesto_data <- manifesto_data[c("party","parfam")]
manifesto_data <- manifesto_data[!duplicated(manifesto_data$party),]
manifesto_data$parfam <- factor(manifesto_data$parfam)
levels(manifesto_data$parfam) <- c("Ecological","Socialist","Social Democratic","Liberal",
                                   "Christian Democratic","Conservative","Nationalist",
                                   "Agrarian",NA,NA,NA,NA)

data1 <- get_gov(data1,parlgov_data)

data1[data1$party_id_sender == 0,]$sender_ingov <- 1
data1[data1$party_id_sender == 1,]$sender_ingov <- 0
data1[data1$party_id_receiver == 0,]$receiver_ingov <- 1
data1[data1$party_id_receiver == 1,]$receiver_ingov <- 0

data1 <- merge(data1,user_data[c("party_id","mp_party_id","parlgov_id","user_id")],by.x="Sender_id",by.y="user_id",all.x=T)
colnames(data1)[(ncol(data1)-2):ncol(data1)] <- c("party_id_sender","mp_id_sender","parlgov_id_sender")
data1 <- merge(data1,user_data[c("party_id","mp_party_id","parlgov_id","user_id")],by.x="Receiver_id",by.y="user_id",all.x=T)
colnames(data1)[(ncol(data1)-2):ncol(data1)] <- c("party_id_receiver","mp_id_receiver","parlgov_id_receiver")
data1 <- merge(data1,countries[c("country_id","country")],by="country_id",all.x=T)

data1 <- merge(data1,user_data[c("user_id","followers","is_head_of_gov","cabinet_member")],by.x="Sender_id",by.y="user_id",all.x=T)
colnames(data1)[(length(colnames(data1))-2):length(colnames(data1))] <- c("sender_followers","sender_hog","sender_cabinet")
data1 <- merge(data1,user_data[c("user_id","followers","is_head_of_gov","cabinet_member")],by.x="Receiver_id",by.y="user_id",all.x=T)
colnames(data1)[(length(colnames(data1))-2):length(colnames(data1))] <- c("receiver_followers","receiver_hog","receiver_cabinet")

write.csv(data1,"data1.csv",row.names = F)

data2 <- merge(data2,user_data[c("party_id","mp_party_id","parlgov_id","user_id")],by.x="Sender_id",by.y="user_id",all.x=T)
colnames(data2)[(ncol(data2)-2):ncol(data2)] <- c("party_id_sender","mp_id_sender","parlgov_id_sender")
data2 <- merge(data2,user_data[c("party_id","mp_party_id","parlgov_id","user_id")],by.x="Receiver_id",by.y="user_id",all.x=T)
colnames(data2)[(ncol(data2)-2):ncol(data2)] <- c("party_id_receiver","mp_id_receiver","parlgov_id_receiver")
data2 <- merge(data2,countries[c("country_id","country")],by="country_id",all.x=T)

data2 <- merge(data2,user_data[c("user_id","followers","is_head_of_gov","cabinet_member")],by.x="Sender_id",by.y="user_id",all.x=T)
colnames(data2)[(length(colnames(data2))-2):length(colnames(data2))] <- c("sender_followers","sender_hog","sender_cabinet")
data2 <- merge(data2,user_data[c("user_id","followers","is_head_of_gov","cabinet_member")],by.x="Receiver_id",by.y="user_id",all.x=T)
colnames(data2)[(length(colnames(data2))-2):length(colnames(data2))] <- c("receiver_followers","receiver_hog","receiver_cabinet")

data2 <- get_gov(data2,parlgov_data)

data2[data2$party_id_sender == 0,]$sender_ingov <- 1
data2[data2$party_id_sender == 1,]$sender_ingov <- 0
data2[data2$party_id_receiver == 0,]$receiver_ingov <- 1
data2[data2$party_id_receiver == 1,]$receiver_ingov <- 0

write.csv(data2,"data2.csv",row.names = F)